Clean up environment and load motor vehicle thefts data into mvt

rm(list=ls())
setwd('~/EdX/AE')
mvt = read.csv("./Data/mvt.csv", stringsAsFactors=FALSE) #mvt = MotorVehicleThefts

Manipulate “12/31/12 22:00” format date

mvt$Date = strptime(mvt$Date, format="%m/%d/%y %H:%M")
mvt$Weekday = weekdays(mvt$Date)
mvt$Hour = mvt$Date$hour

Save date data as data frame and ggplot it

WeekdayCounts = as.data.frame(table(mvt$Weekday))

library('ggplot2')
ggplot(WeekdayCounts, aes(x=Var1, y=Freq)) + geom_line(aes(group=1))  

Convert Var1 to orderedFactor and replot

WeekdayCounts$Var1 = factor(WeekdayCounts$Var1, ordered=TRUE, levels=c("Sunday", "Monday", "Tuesday", "Wednesday", "Thursday", "Friday","Saturday"))
ggplot(WeekdayCounts, aes(x=Var1, y=Freq)) + geom_line(aes(group=1))

Plot Line Styles

ggplot(WeekdayCounts, aes(x=Var1, y=Freq)) + geom_line(aes(group=1)) + xlab("Day of the Week") + ylab("Total Motor Vehicle Thefts")

ggplot(data = WeekdayCounts, aes(x = Var1, y = Freq)) +
 geom_line(aes(group=1), linetype=2) +
 ggtitle("linetype=2") +
 xlab("Day of Weeek") +
 ylab("Total Motor Vehicle Thefts")

#ggplot(WeekdayCounts, aes(x=Var1, y=Freq)) + geom_line(aes(group=1), linetype=2) + xlab("Day of the Week") + ylab("Total Motor Vehicle Thefts" + ggtitle("linetype=2"))
ggplot(data = WeekdayCounts, aes(x = Var1, y = Freq)) +
 geom_line(aes(group=1), alpha = 0.3) +
 ggtitle("alpha=0.3") +
 xlab("Day of Weeek") +
 ylab("Total Motor Vehicle Thefts")

#ggplot(WeekdayCounts, aes(x=Var1, y=Freq)) + geom_line(aes(group=1), alpha=0.3) + labs(list(title = "alpha=0.3", x = "Day of Weeek", y = "Total Motor Vehicle Thefts")))
#p + labs(list(title = "Title", x = "X", y = "Y"))

Determine number of thefts per hour by making table of Weekday v Hour, convert to numeric dataFrame, and plot

table(mvt$Weekday, mvt$Hour)
##            
##                0    1    2    3    4    5    6    7    8    9   10   11
##   Friday    1873  932  743  560  473  602  839 1203 1268 1286  938  822
##   Monday    1900  825  712  527  415  542  772 1123 1323 1235  971  737
##   Saturday  2050 1267  985  836  652  508  541  650  858 1039  946  789
##   Sunday    2028 1236 1019  838  607  461  478  483  615  864  884  787
##   Thursday  1856  816  696  508  400  534  799 1135 1298 1301  932  731
##   Tuesday   1691  777  603  464  414  520  845 1118 1175 1174  948  786
##   Wednesday 1814  790  619  469  396  561  862 1140 1329 1237  947  763
##            
##               12   13   14   15   16   17   18   19   20   21   22   23
##   Friday    1207  857  937 1140 1165 1318 1623 1652 1736 1881 2308 1921
##   Monday    1129  824  958 1059 1136 1252 1518 1503 1622 1815 2009 1490
##   Saturday  1204  767  963 1086 1055 1084 1348 1390 1570 1702 2078 1750
##   Sunday    1192  789  959 1037 1083 1160 1389 1342 1706 1696 2079 1584
##   Thursday  1093  752  831 1044 1131 1258 1510 1537 1668 1776 2134 1579
##   Tuesday   1108  762  908 1071 1090 1274 1553 1496 1696 1816 2044 1458
##   Wednesday 1225  804  863 1075 1076 1289 1580 1507 1718 1748 2093 1511
DayHourCounts = as.data.frame(table(mvt$Weekday, mvt$Hour))
DayHourCounts$Hour = as.numeric(as.character(DayHourCounts$Var2))
ggplot(DayHourCounts, aes(x=Hour, y=Freq)) + geom_line(aes(group=Var1))

Plot options

ggplot(DayHourCounts, aes(x=Hour, y=Freq)) + geom_line(aes(group=Var1, color=Var1), size=2)

# Separate the weekends from the weekdays:
DayHourCounts$Type = ifelse((DayHourCounts$Var1 == "Sunday") | (DayHourCounts$Var1 == "Saturday"), "Weekend", "Weekday")
ggplot(data = DayHourCounts, aes(x = Hour, y = Freq)) +
 geom_line(aes(group=Var1, color=Type), size=2) +
 ggtitle("Segregate Weekends Colored by Type") +
 xlab("Day of Weeek") +
 ylab("Total Motor Vehicle Thefts")

#ggplot(data = DayHourCounts, aes(x = Hour, y = Freq)) +
 #geom_line(aes(group=Var1), color=Type, size=2) +
 #gtitle("Segregate Weekends Colored by Type"))
#ggplot(DayHourCounts, aes(x=Hour, y=Freq)) + geom_line(aes(group=Var1, color=Type) + ggtitle("Segregate Weekends\Colored by Type"), size=2) 
ggplot(data = DayHourCounts, aes(x = Hour, y = Freq)) +
 geom_line(aes(group=Var1, color=Type), size=2, alpha=0.2) +
 ggtitle("Segregate Weekends Colored by Type") +
 xlab("Day of Weeek") +
 ylab("Total Motor Vehicle Thefts")

#ggplot(DayHourCounts, aes(x=Hour, y=Freq)) + geom_line(aes(group=Var1, color=Type) + ggtitle("alpha=.2 transparency"), size=2, alpha=0.2) 

Define Mon-Sun order for days, and create heatmaps

#chronoOrder days
DayHourCounts$Var1 = factor(DayHourCounts$Var1, ordered=TRUE, levels=c("Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"))

ggplot(data = DayHourCounts, aes(x = Hour, y = Var1)) +
  geom_tile(aes(fill=Freq)) +
 ggtitle("frequency displayed as fill intensity")

#ggplot(DayHourCounts, aes(x = Hour, y = Var1)) + geom_tile(aes(fill = Freq) + ggtitle("frequency displayed as fill intensity"))

ggplot(DayHourCounts, aes(x = Hour, y = Var1)) + geom_tile(aes(fill = Freq)) + scale_fill_gradient(name="Total MV Thefts") + ggtitle("with scale_fill_gradient") + theme(axis.title.y = element_blank())

# Change the color scheme - define what is dark/light
ggplot(DayHourCounts, aes(x = Hour, y = Var1)) + geom_tile(aes(fill = Freq)) + scale_fill_gradient(name="Total MV Thefts", low="white", high="red") + ggtitle("define low crime = white") + theme(axis.title.y = element_blank())

Load and display Chicago map

library('maps')
library('ggmap')
chicago = get_map(location = "chicago", zoom = 11)
## Map from URL : http://maps.googleapis.com/maps/api/staticmap?center=chicago&zoom=11&size=640x640&scale=2&maptype=terrain&language=en-EN&sensor=false
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=chicago&sensor=false
ggmap(chicago)

Plot first 100 motor vehicle thefts on map

ggmap(chicago) + 
  geom_point(data = mvt[1:100,], 
             aes(x = Longitude, y = Latitude) ) +
  ggtitle("Motor Vehicle Thefts In Chicago")
## Warning in loop_apply(n, do.ply): Removed 7 rows containing missing values
## (geom_point).

#ggmap(chicago) + geom_point(data = mvt[1:100,], aes(x = Longitude, y = Latitude) )#+ ggtitle("Motor Vehicle Thefts In Chicago"))

#labs(x = 'Longitude', y = 'Latitude') + ggtitle('Baylor University')

By area, create a crimeCounts dataFrame with numeric long/lat data, and plot on map

LatLonCounts = as.data.frame(table(round(mvt$Longitude,2), round(mvt$Latitude,2)))
LatLonCounts$Long = as.numeric(as.character(LatLonCounts$Var1))
LatLonCounts$Lat = as.numeric(as.character(LatLonCounts$Var2))
ggmap(chicago) + 
  geom_point(data = LatLonCounts, 
             aes(x = Long, y = Lat, color=Freq, size=Freq )) +
  ggtitle("Motor Vehicle Thefts In Chicago")
## Warning in loop_apply(n, do.ply): Removed 615 rows containing missing
## values (geom_point).

#ggmap(chicago) + geom_point(data = LatLonCounts, aes(x = Long, y = Lat, color = Freq, size=Freq) +ggtitle("ChicagoCrime by Area"))
ggmap(chicago) + geom_point(data = LatLonCounts, aes(x = Long, y = Lat, color = Freq, size=Freq)) + ggtitle("scale_color_gradient \ yellow=low; red=high") + scale_colour_gradient(low="yellow", high="red")
## Warning in loop_apply(n, do.ply): Removed 615 rows containing missing
## values (geom_point).

ggmap(chicago) + geom_tile(data = LatLonCounts, aes(x = Long, y = Lat, alpha = Freq), fill="red")

Delete aquatic data

LatLonCounts2 = LatLonCounts[LatLonCounts$Freq >0,]
LatLonCounts2$Long = as.numeric(as.character(LatLonCounts2$Var1))
LatLonCounts2$Lat = as.numeric(as.character(LatLonCounts2$Var2))
ggmap(chicago) + ggtitle("Chicago has no water crime") + geom_tile(data=LatLonCounts2, aes(x = Long, y = Lat, alpha=Freq), fill="red")

Clean up environment and load murder data and US map

rm(list=ls())
murders = read.csv("./Data/murders.csv")
statesMap = map_data("state")

ggplot(statesMap, aes(x = long, y = lat, group = group)) + geom_polygon(fill = "white", color = "black") 

Merge statesMap data and murder data, and plot

# Create a new variable called region with the lowercase names to match the statesMap:
murders$region = tolower(murders$State)
murderMap = merge(statesMap, murders, by="region")

ggplot(murderMap, aes(x = long, y = lat, group = group, fill = Murders)) + ggtitle("Murders in the US by State") + geom_polygon(color = "black") + scale_fill_gradient(low = "black", high = "red", guide = "legend")

ggplot(murderMap, aes(x = long, y = lat, group = group, fill = Population)) + ggtitle("Population in US") + geom_polygon(color = "black") + scale_fill_gradient(low = "black", high = "red", guide = "legend")

murderMap$MurderRate = murderMap$Murders / murderMap$Population * 100000

ggplot(murderMap, aes(x = long, y = lat, group = group, fill = MurderRate)) + ggtitle("Murder Rate") + geom_polygon(color = "black") + scale_fill_gradient(low = "white", high = "red", guide = "legend")

ggplot(murderMap, aes(x = long, y = lat, group = group, fill = MurderRate)) + ggtitle("Murders in the US sans DC") + geom_polygon(white = "black") + scale_fill_gradient(low = "white", high = "red", guide = "legend", limits = c(0,10))

murderMap$GunRate = murderMap$GunOwnership/ murderMap$Population * 100000

ggplot(murderMap, aes(x = long, y = lat, group = group, fill = GunRate)) + ggtitle("Gun Ownership by State") + geom_polygon(color = "black") + scale_fill_gradient(low = "white", high = "red", guide = "legend")

murderMap$GunRate = log(murderMap$GunOwnership/ murderMap$Population)

ggplot(murderMap, aes(x = long, y = lat, group = group, fill = GunRate)) + ggtitle("Rate of Gun Ownership") +geom_polygon(color = "white") + scale_fill_gradient(low = "black", high = "red", guide = "legend")